home *** CD-ROM | disk | FTP | other *** search
- <?php
- /* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
- // +----------------------------------------------------------------------+
- // | PHP version 4╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩|
- // +----------------------------------------------------------------------+
- // | Copyright (c) 1997-2002 The PHP Group╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩|
- // +----------------------------------------------------------------------+
- // | This source file is subject to version 2.0 of the PHP license,╩╩╩╩╩╩╩|
- // | that is bundled with this package in the file LICENSE, and is╩╩╩╩╩╩╩╩|
- // | available at through the world-wide-web at╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩|
- // | http://www.php.net/license/2_02.txt.╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩|
- // | If you did not receive a copy of the PHP license and are unable to╩╩╩|
- // | obtain it through the world-wide-web, please send a note to╩╩╩╩╩╩╩╩╩╩|
- // | license@php.net so we can mail you a copy immediately.╩╩╩╩╩╩╩╩╩╩╩╩╩╩╩|
- // +----------------------------------------------------------------------+
- // | Author: George Schlossnagle <george@omniti.com> |
- // +----------------------------------------------------------------------+
- //
- // $Id$
-
- /*
- * Text_Statistics calculates some basic readability metrics on a
- * block of text. The number of words, the number of sentences,
- * and the number of total syllables is counted. These statistics
- * can be used to calculate the Flesch score for a sentence, which
- * is a number (usually between 0 and 100) that represents the
- * readability of the text. A basic breakdown of scores is:
- *
- * 90 to 100 5th grade
- * 80 to 90 6th grade
- * 70 to 80 7th grade
- * 60 to 70 8th and 9th grade
- * 50 to 60 10th to 12th grade (high school)
- * 30 to 50 college
- * 0 to 30 college graduate
- *
- * More info can be read up on at
- * http://www.mang.canterbury.ac.nz/courseinfo/AcademicWriting/Flesch.htm
- *
- * require 'Text/Statistics.php';
- * $block = Text_Statistics($sometext);
- * $block->flesch; // returns flesch score for $sometext
- *
- * see the unit tests for additional examples.
- *
- * @package Text_Statistics
- * @author George Schlossnagle <george@omniti.com>
- */
-
- require_once "Text/Word.php";
-
- class Text_Statistics {
- /*
- * The document text.
- *
- * @var string
- * @access public
- */
- var $text = '';
-
- /*
- * The number of syllables in the document.
- *
- * @var number
- * @access public
- */
- var $numSyllables = 0;
-
- /*
- * The number of words in the document.
- *
- * @var number
- * @access public
- */
- var $numWords = 0;
-
- /*
- * The number of unique words in the document.
- *
- * @var number
- * @access public
- */
- var $uniqWords = 0;
-
- /*
- * The number of sentences in the document.
- *
- * @var number
- * @access public
- */
- var $numSentences = 0;
-
- /*
- * The Flesch score of the document.
- *
- * @var number
- * @access public
- */
- var $flesch = 0;
-
- /*
- * Some abbreviations we should expand. THis list could/should
- * be much larger.
- *
- * @var number
- * @access protected
- */
- var $_abbreviations = array('/Mr\./' => 'Misterr',
- '/Mrs\./i' => 'Misses', // Phonetic
- '/etc\./i' => 'etcetera',
- '/Dr\./i' => 'Doctor',
- );
-
- /*
- * Constructor.
- *
- * @param string
- * @access public
- */
- function Text_Statistics($block)
- {
- $this->text = $block;
- $this->_analyze();
- }
-
- /*
- * Compute statistics for the document object.
- *
- * @access protected
- */
- function _analyze()
- {
- $lines = explode("\n", $this->text);
- foreach( $lines as $line ) {
- $this->_analyze_line($line);
- }
- $this->flesch = 206.835 -
- (1.015 * ($this->numWords/$this->numSentences)) -
- (84.6 * ($this->numSyllables/$this->numWords));
- }
-
- /*
- * Helper function, computes statistics on a given line.
- *
- * @param string
- * @access protected
- */
- function _analyze_line($line)
- {
- // expand abbreviations for counting syllables
- $line = preg_replace(array_keys($this->_abbreviations),
- array_values($this->_abbreviations),
- $line);
- preg_match_all("/\b(\w[\w'-]*)\b/", $line, $words);
- foreach( $words[1] as $word ) {
- $w_obj = new Text_Word($word);
- $this->numSyllables += $w_obj->numSyllables();
- $this->numWords++;
- if($this->_uniques[strtolower($word)]++ == 0) {
- $this->uniqWords++;
- }
- }
- preg_match_all("/[.!?]/", $line, $matches);
- $this->numSentences += count($matches[0]);
- }
- }
- ?>
-